---
title: "Power, Proximity, Parity"
output: pdf_document
---

```{r}

#This R file replicates Tables 1 & 2 from the main text, and also yields the alternative tables analogous to these two found in the online appendix

rm(list = ls())

#Load Packages
library(fmsb)
library(miceadds)
library(foreign)
library(readstata13)
library(dplyr)
library(geosphere)
library(ggplot2)
library(readxl)
library(tidyr)
library(lubridate)
library(sf)
library(sjlabelled)
library(ggeffects)
library(patchwork)
library(numbers)
library(rlist)
library(extrafont)
library(cleaner)
library(parallel)
library(Rlab)
library(texreg)
library(tictoc)
library(scales)
library(tidyverse)
library(dplyr)
library(caret)
library(pROC)
set.seed(1)
```






```{r import-data}

#Import Relevant Data
nmc <- read.csv("../Data/NMC_5_0/NMC_5_0.csv") #CINC Scores
dyad_mids <- read.dta13("../Data/dyadic_mid_4.02.dta") #Dyadic MIDs data
dyad_mids <- subset(dyad_mids, dyad_mids$strtyr == year) #Limit multi-year MIDs to first year only
locations <- read.csv("../Data/MIDLOC_2.1/MIDLOCA_2.1.csv") #MID Location Data
caploc <- read.csv("../Data/latlong.csv", header=FALSE) #State capital locations
names(caploc) <- c("ccode", "country", "lat", "lat2", "long", "long2")
contig <- read.csv("../Data/DirectContiguity320/contdird.csv") #State contiguity data
alliances <- read.csv("../Data/version4.1_csv/alliance_v4.1_by_directed_yearly.csv") #Alliance data
dem <- read_xls("../Data/p5v2018.xls") #Regime type data
nmc_polity <- read.csv("../Data/nmc_polity.csv") #CINC and Regime type data combined


```


```{r}
#Actual Uses of Force Only

dyad_mids <- subset(dyad_mids, dyad_mids$hihost > 3) #If desire to use all MIDs as DV, set to "0"
```


```{r add-lag}
#Include a lag for contiguity, CINC, and regime type data
contig$year <- contig$year + 1
nmc_polity$year <- nmc_polity$year + 1
```

```{r no-minutes}
#Combined capital lat long data into single variables for each
caploc$caplat <- caploc$lat + caploc$lat2/60
caploc$caplong <- caploc$long + caploc$long2 / 60
caploc <- caploc %>% distinct(ccode, .keep_all = TRUE)
```

```{r generate-negatives}

#Generate "negative" observations---i.e. non-events

States <- read.csv("../Data/states2016.csv") #<- still most recent
States %>% 
  dplyr::select(ccode, styear, endyear) %>% 
  expand(ccode1=ccode, ccode2=ccode, year=seq(1816,2010)) %>% 
  dplyr::filter(ccode1!=ccode2) %>% 
  dplyr::left_join(., States, by=c("ccode1"="ccode")) %>%
  dplyr::filter(year >= styear & year <= endyear) %>%
  dplyr::select(-styear,-endyear) %>% 
  dplyr::left_join(., States, by=c("ccode2"="ccode")) %>%
  dplyr::filter(year >= styear & year <= endyear) %>%
  dplyr::select(ccode1, ccode2, year) -> DDY

sample_list <- sample_n(DDY, 1250000)

lat <- runif(1250000, min = -90, max = 90)
long <- runif(1250000, min = -180, max = 180)

sample_list$month <- round(runif(1250000, 1, 12))
sample_list$day <- NA
sample_list$day <- ifelse(sample_list$month %in% c(1, 3, 5, 7, 8, 10), round(runif(1250000, 1, 31)), sample_list$day)
sample_list$day <- ifelse(sample_list$month %in% c(4, 6, 9, 11), round(runif(1250000, 1, 30)), sample_list$day)
sample_list$day <- ifelse(sample_list$month == 2 & (sample_list$year %% 4) == 0 & sample_list$year != 2000, round(runif(1250000, 1, 29)), sample_list$day)
sample_list$day <- ifelse(is.na(sample_list$day), round(runif(1250000, 1, 28)), sample_list$day)

neg <- cbind(sample_list, lat, long)
```







```{r major-powers}

#Include Major Power Data
major <- function(mids, year, month, day, statea, stateb)
{
  mids$maj1 <- 0
  mids$maj2 <- 0
  mids$date <- paste(mids[[year]],mids[[month]],mids[[day]], sep = "-")
  mids$date <- ymd(mids$date)

  mids$maj1 <- ifelse(mids[[statea]] == "2" & mids$date > ymd("1899-8-13"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "200" & mids$date > ymd("1817-1-1"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "220" & mids$date > ymd("1817-1-1") & mids$date < ymd("1941-6-22"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "220" & mids$date > ymd("1946-8-15"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "255" & mids$date > ymd("1817-1-1") & mids$date < ymd("1919-11-11"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "255" & mids$date > ymd("1926-1-1") & mids$date < ymd("1946-5-7"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "255" & mids$date > ymd("1992-12-11"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "300" & mids$date > ymd("1817-1-1") & mids$date < ymd("1919-11-3"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "325" & mids$date > ymd("1861-1-1") & mids$date < ymd("1944-9-2"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "365" & mids$date > ymd("1817-1-1") & mids$date < ymd("1918-1-5"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "365" & mids$date > ymd("1923-1-1"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "710" & mids$date > ymd("1951-1-1"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "740" & mids$date > ymd("1896-4-1") & mids$date < ymd("1946-8-14"), 1, mids$maj1)
  mids$maj1 <- ifelse(mids[[statea]] == "740" & mids$date > ymd("1992-11-1"), 1, mids$maj1)

  mids$maj2 <- ifelse(mids[[stateb]] == "2" & mids$date > ymd("1899-8-13"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "200" & mids$date > ymd("1817-1-1"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "220" & mids$date > ymd("1817-1-1") & mids$date < ymd("1941-6-22"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "220" & mids$date > ymd("1946-8-15"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "255" & mids$date > ymd("1817-1-1") & mids$date < ymd("1919-11-11"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "255" & mids$date > ymd("1926-1-1") & mids$date < ymd("1946-5-7"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "255" & mids$date > ymd("1992-12-11"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "300" & mids$date > ymd("1817-1-1") & mids$date < ymd("1919-11-3"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "325" & mids$date > ymd("1861-1-1") & mids$date < ymd("1944-9-2"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "365" & mids$date > ymd("1817-1-1") & mids$date < ymd("1918-1-5"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "365" & mids$date > ymd("1923-1-1"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "710" & mids$date > ymd("1951-1-1"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "740" & mids$date > ymd("1896-4-1") & mids$date < ymd("1946-8-14"), 1, mids$maj2)
  mids$maj2 <- ifelse(mids[[stateb]] == "740" & mids$date > ymd("1992-11-1"), 1, mids$maj2)

  return(mids)
}

dyad_mids <- major(dyad_mids, "strtyr", "strtmnth", "strtday", "statea", "stateb")
neg <- major(neg, "year", "month", "day", "ccode1", "ccode2")
```

```{r target-initiator}

#Include variable for whether the state initiated the use of force
dyad_mids$targeta <- ifelse(dyad_mids$rolea == 3 | dyad_mids$rolea == 4, 1, 0)
dyad_mids$initiatora <- ifelse(dyad_mids$rolea == 1 | dyad_mids$rolea == 2, 1, 0)
dyad_mids$joiner <- ifelse(dyad_mids$rolea == 2 | dyad_mids$roleb == 2 | dyad_mids$rolea == 4 | dyad_mids$roleb == 4, 1, 0)

neg$targeta <- rbinom(125000, size = 1, prob=.5)
neg$initiatora <- ifelse(neg$targeta == 1, 0, 1)
neg$joiner <- rbinom(125000, size = 1, prob=sum(dyad_mids$joiner) / length(dyad_mids$joiner))
```

```{r merge-locations}

#Merge dyadic-MIDs data with MIDs location data
mid_loc <- left_join(dyad_mids, locations, by = c("disno" = "dispnum"))
mid_loc$year <- mid_loc$year.x

mid_loc <- subset(mid_loc, !is.na(mid_loc$midloc2_xlongitude))
mid_loc$lat.x <- mid_loc$midloc2_ylatitude
mid_loc$long.x <- mid_loc$midloc2_xlongitude
```

```{r add-nmc-polity}

#Include data for CINC and regime type
add_nmc <- function(mid_loc, nmc_polity)
{
  nmc_subset <- data.frame(nmc_polity$ccode, nmc_polity$year, nmc_polity$cinc, nmc_polity$democ, nmc_polity$autoc)
  names(nmc_subset) <- c("ccode", "year", "cinc", "democ", "autoc")
  mid_loc <- merge(mid_loc, nmc_subset, by.x = c("statea", "year"), by.y = c("ccode", "year"))
  mid_loc <- merge(mid_loc, nmc_subset, by.x = c("stateb", "year"), by.y = c("ccode", "year"), suffixes = c("_a", "_b"))
  return(mid_loc)
}

demaut <- function(dem, aut)
{
  return((dem - aut + 10) / 2)
}

neg$statea <- neg$ccode1
neg$stateb <- neg$ccode2

mid_loc <- add_nmc(mid_loc, nmc_polity)
neg <- add_nmc(neg, nmc_polity)

mid_loc$demauta <- demaut(mid_loc$democ_a, mid_loc$autoc_a)
mid_loc$demautb <- demaut(mid_loc$democ_b, mid_loc$autoc_b)
neg$demauta <- demaut(neg$democ_a, neg$autoc_a)
neg$demautb <- demaut(neg$democ_b, neg$autoc_b)
```

```{r add-capitals}

#Include capital location
add_cap <- function(mid_loc, caploc)
{
  mid_loc <- merge(mid_loc, caploc, by.x = "statea", by.y = "ccode", all.x = TRUE)
  mid_loc <- merge(mid_loc, caploc, by.x = "stateb", by.y = "ccode", suffixes = c("_a", "_b"), all.x = TRUE)
}

mid_loc <- add_cap(mid_loc, caploc)
neg <- add_cap(neg, caploc)
```

```{r add-contig}

#Include contiguity data
mid_loc <- merge(mid_loc, contig, by.x = c("statea", "stateb", "year"), by.y = c("state1no", "state2no", "year"), all.x = TRUE)
mid_loc$contig <- if_else(is.na(mid_loc$dyad.y), 0, 1)

neg <- merge(neg, contig, by.x = c("statea", "stateb", "year"), by.y = c("state1no", "state2no", "year"), all.x = TRUE)
neg$contig <- if_else(is.na(neg$dyad), 0, 1)
```


```{r add-alliances}

#Include alliance data
alliances$ally <- 1
alliances <- data.frame(alliances$ccode1, alliances$ccode2, alliances$year, alliances$ally)
names(alliances) <- c("statea", "stateb", "year", "ally")
alliances <- distinct(alliances)

mid_loc <- merge(mid_loc, alliances, by = c("statea", "stateb", "year"), all.x = TRUE)
neg <- merge(neg, alliances, by = c("statea", "stateb", "year"), all.x = TRUE)

mid_loc$ally<- if_else(is.na(mid_loc$ally), 0, 1)
neg$ally<- if_else(is.na(neg$ally), 0, 1)
```

```{r pol-rev}

#Add political relevance data
mid_loc$polrev <- ifelse( mid_loc$contig == 1 | mid_loc$maj1 == 1 | mid_loc$maj2 == 1,1,0)
neg$polrev <- ifelse(neg$contig == 1 | neg$maj1 == 1 | neg$maj2 == 1, 1, 0)

target_neg <- subset(neg, neg$targeta == 0)
target_mids <- subset(mid_loc, mid_loc$targeta == 0)
```





```{r, set-fatalities}
mid_loc$fatlev <- ifelse(mid_loc$fatlev != c("Missing (knon fatalities","None"),1,0)

neg$fatlev <- 0
```


```{r reduce-vars}

#Create function to reduce DF
reduce_data_frame <- function(frame)
{
  frame <- data.frame(year = frame$year, statea = frame$statea, stateb = frame$stateb, contig = frame$contig, cap_1 = frame$cinc_a, cap_2 = frame$cinc_b, majpow1 = frame$maj1, majpow2 = frame$maj2, caplong_a = frame$caplong_a, caplat_a = frame$caplat_a, caplong_b = frame$caplong_b, caplat_b = frame$caplat_b, mid_lat = frame$lat.x, mid_long = frame$long.x, demauta = frame$demauta, demautb = frame$demautb, ally = frame$ally, 
                    initiator = frame$initiatora, fatal = frame$fatlev, hihost = frame$hihost, polrev = frame$polrev)
  return(frame)
}

neg$hihost <- 0

dat_pos <- reduce_data_frame(mid_loc)
dat_neg <- reduce_data_frame(neg)
dat_neg <- head(dat_neg, 1000000)

onsetinit <- function(pos_frame, neg_frame)
{
  pos_frame$onsetdum <- 1
  pos_frame$initdum <- ifelse(pos_frame$initiator == 1, 1, 0)
  
  neg_frame$onsetdum <- 0
  neg_frame$initdum <- 0
  
  pos_frame$fatalons <- ifelse(pos_frame$fatal == 1, 1, 0)
  neg_frame$fatalons <- 0
  
  frame <- rbind(pos_frame, neg_frame)
  return(frame)
}

dat <- onsetinit(dat_pos, dat_neg)
```








```{r calc-vars}

#Calculate proximate power variables
calc_vars <- function(dat)
{
  dat$caplong_a <- round(dat$caplong_a/.5)*.5
  dat$caplong_b <- round(dat$caplong_b/.5)*.5
  dat$caplat_a <- round(dat$caplat_a/.5)*.5
  dat$caplat_b <- round(dat$caplat_b/.5)*.5
  
  dat$mid_long <- round(dat$mid_long/.5)*.5
  dat$mid_lat <- round(dat$mid_lat/.5)*.5
  
  cap_coord_a <- as.matrix(data.frame(dat$caplong_a, dat$caplat_a))
  cap_coord_b <- as.matrix(data.frame(dat$caplong_b, dat$caplat_b))
  mid_coord <- as.matrix(data.frame(dat$mid_long, dat$mid_lat))

  dat$distancea <- distHaversine(mid_coord, cap_coord_a) / 1000 / 100
  dat$distanceb <- distHaversine(mid_coord, cap_coord_b) / 1000 / 100
  
  #########Minimum distance of 1 to avoid division by 0##########
  dat$distancea <- ifelse(dat$distancea < 1, 1, dat$distancea)
  dat$distanceb <- ifelse(dat$distanceb < 1, 1, dat$distanceb)
  dat$distance <- distHaversine(cap_coord_a, cap_coord_b) / 1000 / 100
  dat$distance <- ifelse(dat$distance < 1, 1, dat$distance)
  
  #Nominal
  dat$abscapdiff <- abs(dat$cap_1 - dat$cap_2)
  dat$caprat <- dat$cap_1 / (dat$cap_1 + dat$cap_2)

  
  #distance discounted (CINC/RAW DISTANCE)
    dat$cincdisa <- dat$cap_1 / dat$distancea
  dat$cincdisb <- dat$cap_2 / dat$distanceb
  dat$abseffcapdiff <- abs(dat$cincdisa - dat$cincdisb)
  dat$effcaprat <- dat$cincdisa / (dat$cincdisa + dat$cincdisb)
  
  #distance discounted (CINC/LOG DISTANCE)
    #########Minimum distance of e^1 for logged distance##########
  
   dat$log_distancea <- ifelse(dat$distancea < exp(1), exp(1), dat$distancea)
 dat$log_distanceb <- ifelse(dat$distanceb < exp(1), exp(1), dat$distanceb)
 dat$log_distance <- ifelse(dat$distance < exp(1), exp(1), dat$distance)
 
 dat$log_distancea <-  log(dat$log_distancea)
dat$log_distanceb <- log(dat$log_distanceb)
dat$log_distance <-  log(dat$log_distance)
  
    dat$log_cincdisa <- dat$cap_1 / dat$log_distancea
  dat$log_cincdisb <- dat$cap_2 / dat$log_distanceb
  dat$abseffcapdiff_log <- abs(dat$log_cincdisa - dat$log_cincdisb)
  dat$effcaprat_Log <- dat$log_cincdisa / (dat$log_cincdisa + dat$log_cincdisb)
  
  
    #distance discounted (CINC/DISTANCE^2)
  
   dat$log_distancea <- ifelse(dat$distancea < exp(1), exp(1), dat$distancea)
 dat$log_distanceb <- ifelse(dat$distanceb < exp(1), exp(1), dat$distanceb)
 dat$log_distance <- ifelse(dat$distance < exp(1), exp(1), dat$distance)
 
 dat$distancea2 <-  dat$distancea*dat$distancea
dat$distanceb2 <- dat$distanceb*dat$distanceb
dat$distance2 <-  dat$distance*dat$distance
  
    dat$cincdisa2 <- dat$cap_1 / dat$distancea2
  dat$cincdisb2 <- dat$cap_2 / dat$distanceb2
  dat$abseffcapdiff_2 <- abs(dat$cincdisa2 - dat$cincdisb2)
  dat$effcaprat_2 <- dat$log_cincdisa / (dat$log_cincdisa + dat$log_cincdisb)


   #distance discounted BDM LSG Calculations
  dat$miles_per_day <- 4.0233 #BDM's 250 miles converted to 100's of km (used here)
  dat$miles_per_day <- ifelse(dat$year > 1918,6.0350,dat$miles_per_day) #BDM's 375 miles converted to 100's of km (used here)
  dat$miles_per_day <- ifelse(dat$year > 1945,8.0467,dat$miles_per_day) #BDM's 500 miles converted to 100's of km (used here)
  dat$BDM_cap_1 <- dat$cap_1^log10((dat$distancea/dat$miles_per_day)+(10-2.718281828))
    dat$BDM_cap_1 <- ifelse(dat$BDM_cap_1 > dat$cap_1, dat$cap_1, dat$BDM_cap_1)
  dat$BDM_cap_2 <- dat$cap_2^log10((dat$distanceb/dat$miles_per_day)+(10-2.718281828))
  dat$BDM_cap_2 <- ifelse(dat$BDM_cap_2 > dat$cap_2, dat$cap_2, dat$BDM_cap_2)
  dat$abseffcapdiff_BDM <- abs(dat$BDM_cap_1 - dat$BDM_cap_2)
  dat$effcaprat_BDM <- dat$BDM_cap_1 / (dat$BDM_cap_1 + dat$BDM_cap_2)
  
  dat$demautinter <- dat$demauta * dat$demautb
  dat$caprat2 <- dat$caprat * dat$caprat
 
  dat$dyad <- ifelse(dat$statea > dat$stateb, paste0(dat$statea, dat$stateb), paste0(dat$stateb, dat$statea))

  dat$mid_lat <- round(dat$mid_lat/0.5)*0.5
  dat$mid_long <- round(dat$mid_long/0.5)*0.5
  return(dat)
}

dat <- calc_vars(dat)
```



```{r}

#Create separate DF with distance-from-nearest-border calculations
dat_w_border_distance <- dat
dat_w_border_distance <- dat_w_border_distance[c(1:3,13:14)]
```



```{r eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}

#####Closet border distance takes a very long time to calculate, due to the 1M non-events. Here is the code that measures the distance. Note it takes, for example, around 12 hours to run on a Macbook Pro with M3pro chip. Output DF (simply including the closest-border-to-dispute distance) is provided for convenience (so that this chuck does not need to be run each time).



####Calculate Closest Border Distance
library(cshapes)
library(geosphere)
library(ggplot2)
library(magrittr)
library(sf)
library(sp)
library(maptools)


#download CShapes2 data
CSHAPES2 <- cshp(date = NA, useGW = FALSE, dependencies = FALSE)

#####reduce polygon complexity
CSHAPES2 <- rmapshaper::ms_simplify(CSHAPES2, keep_shapes=T, keep=0.01,
                                   method = "dp",  snap=T)

library(dplyr)

#####FIRST, WE ARE FIGURING OUT THE APPROPRIATE POLYGON FOR EACH STATE GIVEN THE DATE. HERE, WE ARE MATCHING IT TO THE INDIVIDUAL FID's USED BY CSHAPES. THIS WILL LATER TELL US WHICH POLYGON TO CALL.
dat_w_border_distance$geometry_FID_stateA <- NA
dat_w_border_distance$potential_FIDs_A <- NA
dat_w_border_distance$Matched_COW_A <- NA

for (i in 1:
       nrow(dat_w_border_distance)
     #1
     ){
  CSHAPES2_filtered <- CSHAPES2
  CSHAPES2_filtered$after <- NA
   CSHAPES2_filtered$before <- NA
    CSHAPES2_filtered$COWcode <- NA
    CSHAPES2_filtered$after <- CSHAPES2_filtered$start <= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$before <- CSHAPES2_filtered$end >= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$COWcode <- CSHAPES2_filtered$cowcode == dat_w_border_distance$statea[i]
    CSHAPES2_filtered <- subset(CSHAPES2_filtered,CSHAPES2_filtered$after == T & CSHAPES2_filtered$before == T  & CSHAPES2_filtered$COWcode == T)
   
    dat_w_border_distance$geometry_FID_stateA[i] <- CSHAPES2_filtered$fid[1]
   dat_w_border_distance$potential_FIDs_A[i] <- nrow(CSHAPES2_filtered)
   dat_w_border_distance$Matched_COW_A[i] <- CSHAPES2_filtered$cowcode[1]
   
}


dat_w_border_distance$geometry_FID_stateB <- NA
dat_w_border_distance$potential_FIDs_B <- NA
dat_w_border_distance$Matched_COW_B <- NA
for (i in 1:nrow(dat_w_border_distance)) {
  CSHAPES2_filtered <- CSHAPES2
  CSHAPES2_filtered$after <- NA
   CSHAPES2_filtered$before <- NA
    CSHAPES2_filtered$COWcode <- NA
    CSHAPES2_filtered$after <- CSHAPES2_filtered$start <= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$before <- CSHAPES2_filtered$end >= as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d")
    CSHAPES2_filtered$COWcode <- CSHAPES2_filtered$cowcode == dat_w_border_distance$stateb[i]
    CSHAPES2_filtered <- subset(CSHAPES2_filtered,CSHAPES2_filtered$after == T & CSHAPES2_filtered$before == T  & CSHAPES2_filtered$COWcode == T)
   
    dat_w_border_distance$geometry_FID_stateB[i] <- CSHAPES2_filtered$fid[1]
     dat_w_border_distance$potential_FIDs_B[i] <- nrow(CSHAPES2_filtered)
    dat_w_border_distance$Matched_COW_B[i] <- CSHAPES2_filtered$cowcode[1]
}

###NOW WE HAVE THE APPROPRIATE FID's FOR STATE A AND B FOR EACH DYAD. NOW WE NEED TO CALL THE APPROPRIATE POLYGONS AND CALCULATE DISTANCE

####DROP NA's
dat_w_border_distance <- subset(dat_w_border_distance,!is.na(dat_w_border_distance$geometry_FID_stateA))
dat_w_border_distance <- subset(dat_w_border_distance,!is.na(dat_w_border_distance$geometry_FID_stateB))


dat_w_border_distance$state_A_dist_from_border <- NA
dat_w_border_distance$state_A_closest_lon <- NA
dat_w_border_distance$state_A_closest_lat <- NA
dat_w_border_distance$state_A_cap_lon <- NA
dat_w_border_distance$state_A_cap_lat <- NA

for (i in #4:5
     1:nrow(dat_w_border_distance)
     ){
   
    STATE_A <- CSHAPES2[CSHAPES2$fid == dat_w_border_distance$geometry_FID_stateA[i],]
    
    STATE_A <- subset(STATE_A,!is.na(STATE_A$caplong))
  pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_A)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)


pts.wit.dist <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist )
DF <- DF[c(1:5)]



pts.sp <- sp::SpatialPoints(coords      = pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)


####only keep those OUTSIDE borders (If event is inside a state's border, then distance will be 0)
pts.spdf <-as(pts.sp,"SpatialPointsDataFrame")
pts.spdf <- pts.spdf[wrld_subset,]  #only those inside dataframe NEED to figure out how to do opposite!
DF_Inside <- as.data.frame(pts.spdf)
DF <- merge(DF,DF_Inside,by = c("X1","X2"),all = T)
DF$distance_from_closest_border <- ifelse(is.na(DF$dummy),DF$distance,0)

dat_w_border_distance$state_A_dist_from_border[i] <- DF$distance_from_closest_border[1]
dat_w_border_distance$state_A_closest_lon[i] <- DF$lon[1]
dat_w_border_distance$state_A_closest_lat[i] <- DF$lat[1]
dat_w_border_distance$state_A_cap_lon[i] <- STATE_A$caplong[1]
dat_w_border_distance$state_A_cap_lat[i] <- STATE_A$caplat[1]

}
#convert meters to KM
dat_w_border_distance$state_A_dist_from_border <- dat_w_border_distance$state_A_dist_from_border/1000/100




dat_w_border_distance$state_B_dist_from_border <- NA
dat_w_border_distance$state_B_closest_lon <- NA
dat_w_border_distance$state_B_closest_lat <- NA
dat_w_border_distance$state_B_cap_lon <- NA
dat_w_border_distance$state_B_cap_lat <- NA
for (i in 1:nrow(dat_w_border_distance)
     ) {
    STATE_B <- CSHAPES2[CSHAPES2$fid == dat_w_border_distance$geometry_FID_stateB[i],]
    STATE_B <- subset(STATE_B,!is.na(STATE_B$caplong))
  pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_B)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)



pts.wit.dist <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist )
DF <- DF[c(1:5)]

pts.sp <- sp::SpatialPoints(coords      = pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)

####only keep those OUTSIDE borders
pts.spdf <-as(pts.sp,"SpatialPointsDataFrame")
pts.spdf <- pts.spdf[wrld_subset,]  #only those inside dataframe NEED to figure out how to do opposite!
DF_Inside <- as.data.frame(pts.spdf)
DF <- merge(DF,DF_Inside,by = c("X1","X2"),all = T)
DF$distance_from_closest_border <- ifelse(is.na(DF$dummy),DF$distance,0)

dat_w_border_distance$state_B_dist_from_border[i] <- DF$distance_from_closest_border[1]
dat_w_border_distance$state_B_closest_lon[i] <- DF$lon[1]
dat_w_border_distance$state_B_closest_lat[i] <- DF$lat[1]
dat_w_border_distance$state_B_cap_lon[i] <- STATE_B$caplong[1]
dat_w_border_distance$state_B_cap_lat[i] <- STATE_B$caplat[1]
}
#convert metters to KM
dat_w_border_distance$state_B_dist_from_border <- dat_w_border_distance$state_B_dist_from_border/1000/100



```

```{r}

###Use this to output the DF with closet-border-to-dispute distance

#write.csv(dat_w_border_distance, "dat_w_border_distance_partI.csv", row.names=FALSE)
```


```{r}
#Upload closest-border-to-dispute data


dat_w_border_distance <- read_csv("../Data/dat_w_border_distance_partI.csv")

```





```{r}
####Calculating Nearest Border Distance

dat_check <- merge(dat,dat_w_border_distance, by = c("year","statea","stateb","mid_lat","mid_long"))


dat_check <- dat_check[c("year","statea","Matched_COW_A","stateb","Matched_COW_B","caplong_a","state_A_cap_lon","caplong_b","state_B_cap_lon","caplat_a","state_A_cap_lat","caplat_b","state_B_cap_lat","distancea","distanceb","state_A_dist_from_border","state_B_dist_from_border","mid_long","mid_lat")]

cap_coord_a <- as.matrix(data.frame(dat_check$state_A_cap_lon, dat_check$state_A_cap_lat))
cap_coord_b <- as.matrix(data.frame(dat_check$state_B_cap_lon, dat_check$state_B_cap_lat))

 mid_coord <- as.matrix(data.frame(dat_check$mid_long, dat_check$mid_lat))

 ###minimum distance = 1
  dat_check$state_A_dist_from_border <- ifelse(dat_check$state_A_dist_from_border < 1, 1, dat_check$state_A_dist_from_border)
  dat_check$state_B_dist_from_border <- ifelse(dat_check$state_B_dist_from_border < 1, 1, dat_check$state_B_dist_from_border)
  
    dat_check$distancea_cap <- distHaversine(mid_coord, cap_coord_a) / 1000 / 100
  dat_check$distanceb_cap<- distHaversine(mid_coord, cap_coord_b) / 1000 / 100
   dat_check$distancea_cap <- ifelse(dat_check$distancea_cap < 1, 1, dat_check$distancea_cap)
  dat_check$distanceb_cap <- ifelse(dat_check$distanceb_cap < 1, 1, dat_check$distanceb_cap)
  
  ####Log adjust (minimum distance for log is e 100's km))
 dat_check$state_A_dist_from_border_log <-dat_check$state_A_dist_from_border
   dat_check$state_B_dist_from_border_log <- dat_check$state_B_dist_from_border
   
  dat_check$state_A_dist_from_border_log <- ifelse(dat_check$state_A_dist_from_border_log < exp(1), exp(1), dat_check$state_A_dist_from_border_log)
  
 dat_check$state_B_dist_from_border_log <- ifelse(dat_check$state_B_dist_from_border_log < exp(1), exp(1), dat_check$state_B_dist_from_border_log)
 
   dat_check$state_A_dist_from_border_log <- log(dat_check$state_A_dist_from_border_log)
  dat_check$state_B_dist_from_border_log <- log(dat_check$state_B_dist_from_border_log)
  
  
  
   dat_check$distancea_cap_log <-dat_check$distancea_cap
   dat_check$distanceb_cap_log <- dat_check$distanceb_cap

   dat_check$distancea_cap_log <- ifelse(dat_check$distancea_cap_log < exp(1), exp(1), dat_check$distancea_cap_log)
 dat_check$distanceb_cap_log <- ifelse(dat_check$distanceb_cap_log < exp(1), exp(1), dat_check$distanceb_cap_log)
 
  dat_check$distancea_cap_log <- log(dat_check$distancea_cap_log)
  dat_check$distanceb_cap_log <- log(dat_check$distanceb_cap_log)
  
  dat_check <- dat_check[c("year","statea","Matched_COW_A","stateb","Matched_COW_B","caplong_a","state_A_cap_lon","caplong_b","state_B_cap_lon","caplat_a","state_A_cap_lat","caplat_b","state_B_cap_lat","distancea","distancea_cap","distanceb","distanceb_cap","state_A_dist_from_border","state_B_dist_from_border","mid_long","mid_lat","state_A_dist_from_border_log","state_B_dist_from_border_log")]


   
dat_check$cap_diff_a <- dat_check$distancea - dat_check$distancea_cap
dat_check$cap_diff_b <- dat_check$distanceb - dat_check$distanceb_cap
dat_check <- subset(dat_check, dat_check$cap_diff_b > -10.770896 & dat_check$cap_diff_b < 9.545226  )

  
```




```{r}
#Create Figure 4 (Example of Non-Event)



i <- 8733
library("cshapes")
library(geosphere)
CSHAPES2 <- cshp(date = as.Date(paste0(dat_w_border_distance$year[i],"-01-01"), format = "%Y-%m-%d"), useGW = FALSE, dependencies = FALSE)
# Extract Switzerland 
CSHAPES2 <- rmapshaper::ms_simplify(CSHAPES2, keep_shapes=T, keep=1,
                                   method = "dp",  snap=T)

STATE_A <- CSHAPES2[CSHAPES2$cowcode==dat_w_border_distance$statea[i],] 
STATE_A <- subset(STATE_A, !is.na(STATE_A$cowcode))

library(sf)
  spdf <- as_Spatial(STATE_A)

library(sp)
library(geosphere)
library(maptools)
wrld_subset <-spdf

pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_A)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)

pts.wit.dist_A <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist_A )
DF <- DF[c(1:5)]
pts.wit.dist_A[1:3,]

pts.sp_A <- sp::SpatialPoints(coords= pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)

cap.pts.A <- data.frame(t(sapply(list(STATE_A$caplong[1],STATE_A$caplat[1]),c)))
pts.cap_A <- sp::SpatialPoints(coords= cap.pts.A[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)







STATE_B <- CSHAPES2[CSHAPES2$cowcode==dat_w_border_distance$stateb[i],] 
STATE_B <- subset(STATE_B, !is.na(STATE_B$cowcode))

library(sf)
  spdf <- as_Spatial(STATE_B)

library(sp)
library(geosphere)
library(maptools)
wrld_subset <-spdf

pts <- data.frame(t(sapply(list(dat_w_border_distance$mid_long[i],dat_w_border_distance$mid_lat[i]),c)))
spdf <- as_Spatial(STATE_B)
wrld_subset <-spdf
dist.mat <- geosphere::dist2Line(p = pts, line = wrld_subset)

pts.wit.dist_B <- cbind(pts, dist.mat)
DF <- as.data.frame(pts.wit.dist_B )
DF <- DF[c(1:5)]
pts.wit.dist_B[1:3,]

pts.sp_B <- sp::SpatialPoints(coords= pts[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)



cap.pts.B <- data.frame(t(sapply(list(STATE_B$caplong[1],STATE_B$caplat[1]),c)))
pts.cap_B <- sp::SpatialPoints(coords= cap.pts.B[,c("X1","X2")], # order matters
                            proj4string = wrld_subset@proj4string)








STATE_A_and_B <- CSHAPES2[CSHAPES2$cowcode==dat_w_border_distance$statea[i] | CSHAPES2$cowcode==dat_w_border_distance$stateb[i],] 

STATE_A_and_B <- subset(STATE_A_and_B, !is.na(STATE_A_and_B$cowcode))

library(sf)
#if (require(sp, quietly = TRUE)) {
  # convert to SpatialPolygonsDataFrame
  spdf <- as_Spatial(STATE_A_and_B)
  # identical to
  #spdf <- as(COUNTRY, "Spatial")
  # convert to SpatialPolygons
 # as(st_geometry(COUNTRY), "Spatial")
  # back to sf
#  as(spdf, "sf")
#}


library(sp)
library(geosphere)
library(maptools)
# some country polygons to try on
#data(wrld_simpl, package = "maptools")
#wrld_subset <- wrld_simpl[wrld_simpl@data$ISO2 %in% c("DE"),]

wrld_subset <-spdf
pdf("Figure_4.pdf", width = 8, height = 6)

# Load required packages
library(sp)
library(geosphere)
library(rnaturalearth)
library(rnaturalearthdata)

# ------------------------------------------------------------
# (Assuming that you already have your "wrld_subset" object (your states)
#  and the capital point objects "pts.cap_A" and "pts.cap_B" defined.)
# For example, your states and capitals might have been created earlier:
#   - wrld_subset: SpatialPolygonsDataFrame of your states of interest
#   - pts.cap_A and pts.cap_B: SpatialPoints for each state's capital
# ------------------------------------------------------------

# Retrieve Greenland's polygon (as a Spatial object)
greenland <- ne_countries(scale = "medium", 
                          country = "Greenland", 
                          returnclass = "sp")

# Set up the plot WITHOUT an overall title.
plot(wrld_subset, 
     col = "grey95",      # very light grey fill for states
     border = "grey40")   # darker grey for borders

# Add Greenland's polygon to the map in grey scale.
plot(greenland, 
     col = "grey80",      # medium grey fill for Greenland
     border = "grey50",   # slightly darker grey border
     add = TRUE)

# Plot the state capitals in black.
points(pts.cap_A, 
       col = "black", 
       pch = 16,      # solid circle
       cex = 1.2)
points(pts.cap_B, 
       col = "black", 
       pch = 16, 
       cex = 1.2)

# Define the mid point (the point near Iceland, here chosen as (-40, 65))
mid_coords <- c(-40, 65)
mid_point <- SpatialPoints(coords = matrix(mid_coords, nrow = 1),
                             proj4string = wrld_subset@proj4string)
# Plot the mid point (using an open circle so it stands out)
points(mid_point, 
       col = "black", 
       pch = 1,        
       cex = 1.5)

# Extract coordinates from the capitals so we can draw arrows
cap_coords_A <- coordinates(pts.cap_A)
cap_coords_B <- coordinates(pts.cap_B)

# Draw an arrow from State A's capital to the mid point.
arrows(x0 = cap_coords_A[1, 1],
       y0 = cap_coords_A[1, 2],
       x1 = mid_coords[1],
       y1 = mid_coords[2],
       length = 0.1,
       col = "black",
       lwd = 2)

# Draw an arrow from State B's capital to the mid point.
arrows(x0 = cap_coords_B[1, 1],
       y0 = cap_coords_B[1, 2],
       x1 = mid_coords[1],
       y1 = mid_coords[2],
       length = 0.1,
       col = "black",
       lwd = 2)

# -----------------------------
# Compute midpoints and arrow angles for labeling

# For arrow A:
x_mid_A <- (cap_coords_A[1, 1] + mid_coords[1]) / 2
y_mid_A <- (cap_coords_A[1, 2] + mid_coords[2]) / 2
dx_A <- mid_coords[1] - cap_coords_A[1,1]
dy_A <- mid_coords[2] - cap_coords_A[1,2]
angle_A <- 42.6
len_A <- sqrt(dx_A^2 + dy_A^2)
offset_A <- len_A * 0.05  # offset is 5% of the arrow length
# Compute perpendicular offset components (rotate (dx,dy) by 90°)
offset_x_A <- -dy_A / len_A * offset_A
offset_y_A <- dx_A / len_A * offset_A

# For arrow B:
x_mid_B <- (cap_coords_B[1, 1] + mid_coords[1]) / 2
y_mid_B <- (cap_coords_B[1, 2] + mid_coords[2]) / 2
dx_B <- mid_coords[1] - cap_coords_B[1,1]
dy_B <- mid_coords[2] - cap_coords_B[1,2]
angle_B <- -13
len_B <- sqrt(dx_B^2 + dy_B^2)
offset_B <- len_B * -0.03  # offset is 3% of the arrow length
offset_x_B <- -dy_B / len_B * offset_B
offset_y_B <- dx_B / len_B * offset_B

# -----------------------------
# Add rotated text labels at the offset positions.
text(x_mid_A + offset_x_A, y_mid_A + offset_y_A, 
     "U.S. to conflict distance", 
     srt = angle_A, cex = 0.8, col = "black", xpd = TRUE)

text(x_mid_B + offset_x_B, y_mid_B + offset_y_B, 
     "Denmark to conflict distance", 
     srt = angle_B, cex = 0.8, col = "black", xpd = TRUE)

# Close the PDF device to finalize the file
dev.off()

```



```{r}
dat_check <- merge(dat_check,dat, by = c("year","statea","stateb","mid_lat","mid_long"))


#Non distance adjusted
  dat_check$capdiff <- dat_check$cap_1 - dat_check$cap_2
  
#Distance Adjusted Power

##Raw
  dat_check$cincdisa_cap <- dat_check$cap_1 / dat_check$distancea_cap
  dat_check$cincdisb_cap <- dat_check$cap_2 / dat_check$distanceb_cap

  dat_check$abscincdisdiff_cap <- abs(dat_check$cincdisa_cap - dat_check$cincdisb_cap)
  dat_check$effcaprat_cap <- dat_check$cincdisa_cap / (dat_check$cincdisa_cap + dat_check$cincdisb_cap)

  
  ##Raw (from border)
  dat_check$cincdisa_border <- dat_check$cap_1 / dat_check$state_A_dist_from_border
  dat_check$cincdisb_border  <- dat_check$cap_2 / dat_check$state_B_dist_from_border

  dat_check$abscincdisdiff_border <- abs(dat_check$cincdisa_border - dat_check$cincdisb_border)
  dat_check$effcaprat_border <- dat_check$cincdisa_border / (dat_check$cincdisa_border + dat_check$cincdisb_border)

  
  #logged (from border)
 
  
   dat_check$log_cincdisa_border <- dat_check$cap_1 / dat_check$state_A_dist_from_border_log
  dat_check$log_cincdisb_border <- dat_check$cap_2 / dat_check$state_B_dist_from_border_log
  dat_check$abseffcapdiff_log_border <- abs(dat_check$log_cincdisa_border - dat_check$log_cincdisb_border)
  dat_check$effcaprat_Log_border <- dat_check$log_cincdisa_border / (dat_check$log_cincdisa_border + dat_check$log_cincdisb_border)
  
  
  
  
  
  
  ###BDM
    dat_check$miles_per_day <- 4.0233 #BDM's 250 miles converted to 100's of km (used here)
  dat_check$miles_per_day <- ifelse(dat_check$year > 1918,6.0350,dat_check$miles_per_day) #BDM's 375 miles converted to 100's of km (used here)
  dat_check$miles_per_day <- ifelse(dat_check$year > 1945,8.0467,dat_check$miles_per_day) #BDM's 500 miles converted to 100's of km (used here)
  dat_check$BDM_cap_1_border <- dat_check$cap_1^log10((dat_check$state_A_dist_from_border/dat_check$miles_per_day)+(10-2.718281828))
    dat_check$BDM_cap_1_border <- ifelse(dat_check$BDM_cap_1_border > dat_check$cap_1, dat_check$cap_1, dat_check$BDM_cap_1_border)
  dat_check$BDM_cap_2_border <- dat_check$cap_2^log10((dat_check$state_B_dist_from_border/dat_check$miles_per_day)+(10-2.718281828))
  dat_check$BDM_cap_2_border <- ifelse(dat_check$BDM_cap_2_border > dat_check$cap_2, dat_check$cap_2, dat_check$BDM_cap_2_border)
  dat_check$effcapdiff_BDM_border <- dat_check$BDM_cap_1_border - dat_check$BDM_cap_2_border
  dat_check$abseffcapdiff_BDM_border <- abs(dat_check$BDM_cap_1_border - dat_check$BDM_cap_2_border)
  dat_check$effcaprat_BDM_border <- dat_check$BDM_cap_1_border / (dat_check$BDM_cap_1_border + dat_check$BDM_cap_2_border)
  




dat2 <- dat_check


```








```{r}
#Sampling weights: calculate possibilities for full dataset 1815 to 2010

nyear <- max(dat$year) - min(dat$year)
#Directed dyads each year, 1816-2010, times PRIO grid squares (720x360)
poss <-436847904000
formatC(poss, format="e")

per_actsual <- length(dyad_mids$disno)/poss


#for intitation
per_sample <- length(subset(dat$initdum, dat$initdum == 1)) / length(dat$initdum)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	


dat$weight_init[dat$initdum == 1] <- weight_pos
dat$weight_init[dat$initdum == 0] <- weight_neg






#weights for onset
per_sample <- length(subset(dat$onsetdum, dat$onsetdum == 1)) / length(dat$onsetdum)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	

dat$weight_onset[dat$onsetdum == 1] <- weight_pos
dat$weight_onset[dat$onsetdum == 0] <- weight_neg

#weight fatal onset
per_sample <- length(subset(dat$fatalons, dat$fatalons == 1)) / length(dat$fatalons)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	

dat$weight_fatalons[dat$fatalons == 1] <- weight_pos
dat$weight_fatalons[dat$fatalons == 0] <- weight_neg

```










```{r}

library(tidyr)
library(dplyr)
library(ggplot2)
library(scales)

# Reshape the data
dat_long <- dat %>% 
  pivot_longer(
    cols = c(caprat, effcaprat),
    names_to = "ratio_type",
    values_to = "ratio_value"
  ) %>% 
  mutate(
    onsetdum = factor(onsetdum, levels = c(0, 1), 
                      labels = c("All Potential Dyads", "Actual Uses of Force")),
    ratio_type = factor(ratio_type, levels = c("caprat", "effcaprat"), 
                        labels = c("Nominal Capability Ratio", "Effective Capability Ratio"))
  )


pdf("Figure_3.pdf", width = 8, height = 6)
# Density histogram with improved grey-scale aesthetics
ggplot(dat_long, aes(x = ratio_value)) +
  geom_histogram(
   binwidth = .01, 
    aes(y = after_stat(density * 1)), 
    fill = "grey90",    # Soft light grey fill for a refined look
    color = "grey50",   # Subtle grey border instead of harsh black
    size = 0.3          # Thin border lines for delicacy
  ) +
  geom_hline(yintercept = 1, color = "grey40", size = 0.8, linetype = "dashed") +
  facet_grid(onsetdum ~ ratio_type, scales = "free_x") +
  theme_bw() +
  scale_y_continuous(labels = function(x) paste0(x, "%")) +
  labs(x = "Capability Ratio", y = "Density (%)") +
  theme(
    text = element_text(family = "Times"),
    legend.position = "none",
    # Optional: adjust grid lines for a softer appearance
    panel.grid.major = element_line(color = "grey85"),
    panel.grid.minor = element_line(color = "grey90")
  )

dev.off()
```








```{r}

#Creat function to measure AIC and BIC for models

custom_gof_metrics <- function(model) {
  
fitted_values <- model$glm_res$fitted.values
# Get the actual response values
actual_values <- model$glm_res$y # Replace 'dat$initdum' with your actual response variable
# Calculate the log-likelihood
log_likelihood <- sum(dbinom(actual_values, size = 1, prob = fitted_values, log = TRUE))
  # Calculate the number of parameters
  num_parameters <- length(coef(model))
  # Calculate the number of observations
  num_obs <- length(model$glm_res$residuals)
  # Calculate the AIC
  aic <- -2 * log_likelihood + 2 * num_parameters
  # Calculate the BIC
  bic <- -2 * log_likelihood + num_parameters * log(num_obs)
  # Save the calculated goodness-of-fit statistics as true values in the model object
  model$logLik <- log_likelihood
  model$aic <- aic
  model$bic <- bic
  # Return the updated model object
  return(c("Observations" = trunc(num_obs), "AIC" = aic, "BIC" = bic, "LL" = log_likelihood))
}





adjusted_deviance <- function(model) {
  
fitted_values <- model$glm_res$fitted.values
# Get the actual response values
actual_values <- model$glm_res$y # Replace 'dat$initdum' with your actual response variable
# Calculate the log-likelihood
log_likelihood <- sum(dbinom(actual_values, size = 1, prob = fitted_values, log = TRUE))

  model$glm_res$deviance <- -2 *log_likelihood
 as.numeric(-2 *log_likelihood)
}





adjusted_AIC <- function(model) {
  
fitted_values <- model$glm_res$fitted.values
# Get the actual response values
actual_values <- model$glm_res$y # Replace 'dat$initdum' with your actual response variable
# Calculate the log-likelihood
log_likelihood <- sum(dbinom(actual_values, size = 1, prob = fitted_values, log = TRUE))
  # Calculate the number of parameters
  num_parameters <- length(coef(model))
  # Calculate the number of observations
  num_obs <- length(model$glm_res$residuals)
  # Calculate the AIC
  aic <- -2 * log_likelihood + 2 * num_parameters
  # Calculate the BIC
  bic <- -2 * log_likelihood + num_parameters * log(num_obs)
  # Save the calculated goodness-of-fit statistics as true values in the model object
  model$logLik <- log_likelihood
  model$aic <- aic
  model$bic <- bic
  # Return the updated model object
  return(as.numeric(aic))
  
}



```




```{r}
#Calculate weights for models utilizing nearest-border-to-dispute distance
#calculate possibilities for border dataset 1886 to 2010

nyear <- max(dat$year) - min(dat$year)

#Directed dyads each year 1886 to 2010, x PRIO Grid Squares (720 x 360)
poss <- 415686297600


formatC(poss, format="e")


#for intitation
per_sample <- length(subset(dat2$initdum, dat2$initdum == 1)) / length(dat2$initdum)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	
dat2$weight_init[dat2$initdum == 1] <- weight_pos
dat2$weight_init[dat2$initdum == 0] <- weight_neg

#weights for onset
per_sample <- length(subset(dat2$onsetdum, dat2$onsetdum == 1)) / length(dat2$onsetdum)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	

dat2$weight_onset[dat2$onsetdum == 1] <- weight_pos
dat2$weight_onset[dat2$onsetdum == 0] <- weight_neg








#weights for fatal
per_sample <- length(subset(dat2$fatalons, dat2$fatalons == 1)) / length(dat2$fatalons)	
weight_pos <- per_actsual / per_sample	
weight_neg <- (1 - per_actsual) / (1 - per_sample)	

dat2$weight_fatal[dat2$fatalons == 1] <- weight_pos
dat2$weight_fatal[dat2$fatalons == 0] <- weight_neg



```




```{r include=FALSE}
####Table 1 {Main Text}

#####To create Table A1 (All MIDs instead of uses of force only), change line 69 to "0" (i.e., dyad_mids <- subset(dyad_mids, dyad_mids$hihost > 0) ). Rerunning these models will give you Table A1.

library(miceadds)


#CINC
model1001.wg <- glm.cluster(onsetdum~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)

texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21,  # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_1.tex", 
  table = FALSE, 
  include.deviance = FALSE,

  # Use built-in significance markers
   stars = c(0.1, 0.05, 0.01, 0.001) 
)


```




```{r include=FALSE}
#Table A3{Online Appendix}

#Use of force Initiation


library(miceadds)



#CINC
model1001.wg <- glm.cluster(initdum~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(initdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(initdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(initdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)

texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21,  # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A3.tex", 
  table = FALSE, 
  include.deviance = FALSE,

  # Use built-in significance markers
   stars = c(0.1, 0.05, 0.01, 0.001) 
)


```

```{r}

#TABLE A13{Online Appendix}
####Selected on DV: Only include actual MIDs in dataset

#Make sure dataset includes ALL MIDs on line 91, above. (I.e., make sure it is set to "0", not "3")
dat_MIDS_UoF <- dat
dat_MIDS_UoF <- subset(dat_MIDS_UoF,dat_MIDS_UoF$hihost > 0)
dat_MIDS_UoF$onsetdum <- ifelse(dat_MIDS_UoF$hihost > 3,1,0)

####Table 1: Different Functional Forms
library(miceadds)



#CINC
model1001.wg <- glm.cluster(onsetdum~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat_MIDS_UoF, family = "binomial", cluster=dat_MIDS_UoF$dyad)
summary(model1001.wg)
#model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
#model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_MIDS_UoF, family = "binomial", cluster=dat_MIDS_UoF$dyad)
summary(model1002.wg)
#model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
#model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_MIDS_UoF, family = "binomial", cluster=dat_MIDS_UoF$dyad)
summary(model1003.wg)
#model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
#model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_MIDS_UoF, family = "binomial", cluster=dat_MIDS_UoF$dyad)
summary(model1004.wg)
#model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
#model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)

texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21,  # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A13.tex", 
  table = FALSE, 
  include.deviance = FALSE,

  # Use built-in significance markers
   stars = c(0.05, 0.01, 0.001) 
)




```











```{r include=FALSE}
####Table A11{Online Appendix} 
####This includes an additional possible functional form: CINC/Dist^2
library(miceadds)



#CINC
model1001.wg <- glm.cluster(onsetdum~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)

#CINC/ Dist^2
model1005.wg <- glm.cluster(onsetdum ~ abseffcapdiff_2 + cincdisa2 + cincdisb2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1005.wg)
model1005.wg$glm_res$deviance <- adjusted_deviance(model1005.wg)
model1005.wg$glm_res$aic <- adjusted_AIC(model1005.wg)


texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg, model1005.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG","CINC/Dist.^2"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21, 22, 23, 24, # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG",     # log_cincdisb
    "Power Difference (Dist.^2)", # abseffcapdiff_BDM
    "CINC A/(Dist.^2)",           # BDM_cap_1
    "CINC B/(Dist.^2)"           # BDM_cap_2
  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A11.tex", 
  table = FALSE, 
  include.deviance = FALSE,
  # Use built-in significance markers
   stars = c(0.1, 0.05, 0.01, 0.001) 
)


```




```{r}
####Table A5{Online Appendix}
#Robustness check of Table 1, using closest-border-to-dispute distance instead of capital-to-dispute distance

#CINC
model1001.wg <- glm.cluster(onsetdum ~ abscapdiff + cap_1 + cap_2 +  distance  + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abscincdisdiff_border + cincdisa_border + cincdisb_border +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)




#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  abseffcapdiff_log_border + log_cincdisa_border + log_cincdisb_border +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC

dat2$abseffcapdiff_BDM_border <- abs(dat2$effcapdiff_BDM_border)
model1004.wg <- glm.cluster(onsetdum ~ abseffcapdiff_BDM_border +  BDM_cap_1_border + BDM_cap_2_border +  distance + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)





texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21,  # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A5.tex", 
  table = FALSE, 
  include.deviance = FALSE,
 stars = c(0.1, 0.05, 0.01, 0.001) 
)



```















```{r include=FALSE}
############Table 2:  Different Functional Forms


#####To create Table A2 (All MIDs instead of uses of force only), change line 69 to "0" (i.e., dyad_mids <- subset(dyad_mids, dyad_mids$hihost > 0) ). Rerunning these models will give you Table A1.

library(miceadds)

#CINC
model1001.wg <- glm.cluster(onsetdum ~ caprat + caprat2 + cap_1 + cap_2 +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


dat$effcaprat2 <- dat$effcaprat*dat$effcaprat
#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~ effcaprat + effcaprat2 + cincdisa + cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

dat$effcaprat_Log2 <- dat$effcaprat_Log*dat$effcaprat_Log
#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

dat$effcaprat_BDM2 <- dat$effcaprat_BDM*dat$effcaprat_BDM
#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  effcaprat_BDM + effcaprat_BDM2 + BDM_cap_1 + BDM_cap_2 +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_2.tex", 
  table = FALSE, 
  include.deviance = FALSE
)


        
```


```{r}
####Table A12{Online Appendix} 
####This includes an additional possible functional form: CINC/Dist^2


dat$effcaprat_2_2 <- dat$effcaprat_2*dat$effcaprat_2
#BDM Depreciated CINC
model1005.wg <- glm.cluster(onsetdum ~  effcaprat_2 + effcaprat_2_2 + cincdisa2 + cincdisb2 +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1005.wg)
model1005.wg$glm_res$deviance <- adjusted_deviance(model1005.wg)
model1005.wg$glm_res$aic <- adjusted_AIC(model1005.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg, model1005.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG", "CINC/Dist.^2"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,26,27,28,29,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG",     # log_cincdisb
   "Power Ratio (Dist.^2)", # abseffcapdiff_BDM
    "Power Ratio (Dist.^2)^2", # abseffcapdiff_BDM
     "CINC A/ Dist.^2",         # cincdisa
    "CINC B/ Dist.^2"                
  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A12.tex", 
  table = FALSE, 
  include.deviance = FALSE
)

```

```{r include=FALSE}
############Table A4{Online Appendix}

#Use of force Initiation


library(miceadds)

#CINC
model1001.wg <- glm.cluster(initdum ~ caprat + caprat2 + cap_1 + cap_2 +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


dat$effcaprat2 <- dat$effcaprat*dat$effcaprat
#CINC/ Raw Dist
model1002.wg <- glm.cluster(initdum ~ effcaprat + effcaprat2 + cincdisa + cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

dat$effcaprat_Log2 <- dat$effcaprat_Log*dat$effcaprat_Log
#CINC/ Log Dist
model1003.wg <- glm.cluster(initdum ~  effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

dat$effcaprat_BDM2 <- dat$effcaprat_BDM*dat$effcaprat_BDM
#BDM Depreciated CINC
model1004.wg <- glm.cluster(initdum ~  effcaprat_BDM + effcaprat_BDM2 + BDM_cap_1 + BDM_cap_2 +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_init)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A4.tex", 
  table = FALSE, 
  include.deviance = FALSE
)


        
```



```{r}
#####Table A14{Online Appendix}



#Make sure dataset includes ALL MIDs on line 91, above. (I.e., make sure it is set to "0", not "3")


dat_MIDS_UoF$initdum <- ifelse(dat_MIDS_UoF$initiator == 1 & dat_MIDS_UoF$onsetdum == 1,1,0)


library(miceadds)

#CINC
model1001.wg <- glm.cluster(onsetdum ~ caprat + caprat2 + cap_1 + cap_2 +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_MIDS_UoF, family = "binomial",cluster=dat_MIDS_UoF$dyad)
summary(model1001.wg)
#model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
#model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


dat_MIDS_UoF$effcaprat2 <- dat_MIDS_UoF$effcaprat*dat_MIDS_UoF$effcaprat
#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~ effcaprat + effcaprat2 + cincdisa + cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_MIDS_UoF, family = "binomial",cluster=dat_MIDS_UoF$dyad)
summary(model1002.wg)
#model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
#model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

dat_MIDS_UoF$effcaprat_Log2 <- dat_MIDS_UoF$effcaprat_Log*dat_MIDS_UoF$effcaprat_Log
#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_MIDS_UoF, family = "binomial",cluster=dat_MIDS_UoF$dyad)
summary(model1003.wg)
#model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
#model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

dat_MIDS_UoF$effcaprat_BDM2 <- dat_MIDS_UoF$effcaprat_BDM*dat_MIDS_UoF$effcaprat_BDM
#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  effcaprat_BDM + effcaprat_BDM2 + BDM_cap_1 + BDM_cap_2 +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_MIDS_UoF, family = "binomial",cluster=dat_MIDS_UoF$dyad)
summary(model1004.wg)
#model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
#model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A14.tex", 
  table = FALSE, 
  include.deviance = FALSE
)





```












```{r}

# Load necessary libraries
library(MASS)
library(boot)
library(ggplot2)
library(dplyr)

# ---------------------------
# 📌 First Model: caprat + caprat2
# ---------------------------
figure7 <- dat

figure7 <- subset(figure7,!is.na(figure7$demauta))
figure7 <- subset(figure7,!is.na(figure7$demautb))
figure7 <- subset(figure7,!is.na(figure7$demautinter))
figure7$caprat2 <- figure7$caprat * figure7$caprat 

func1 <- glm.cluster(onsetdum ~ cap_1 + cap_2 + caprat + caprat2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, 
                     data=figure7, 
                     family = "binomial", 
                     cluster=figure7$dyad, 
                     weights = figure7$weight_onset)

lwg.range <- seq(from = min(figure7$caprat), to = max(figure7$caprat), by = 0.01)

# Create matrix for predicted values
x.lo1 <- c(1, median(figure7$cap_1), median(figure7$cap_2), median(figure7$caprat), 
           median(figure7$caprat2), median(figure7$distance), median(figure7$majpow1),
           median(figure7$majpow2), median(figure7$contig), median(figure7$year), median(figure7$demauta), median(figure7$demautb), median(figure7$demautinter), median(figure7$ally))

X.lo1 <- matrix(x.lo1, nrow = length(x.lo1), ncol = length(lwg.range))
X.lo1[4,] <- lwg.range
X.lo1[5,] <- lwg.range^2 

B.tilde1 <- mvrnorm(1000, coef(func1), vcov(func1)) 
s.lo1 <- apply(inv.logit(B.tilde1 %*% X.lo1), 2, quantile, c(0.025, 0.5, 0.975))

# Store results in a dataframe
df1 <- data.frame(caprat = lwg.range, 
                  lower = s.lo1[1,], 
                  median = s.lo1[2,], 
                  upper = s.lo1[3,], 
                  model = "caprat")

# ---------------------------
# 📌 Second Model: effcaprat + effcaprat2
# ---------------------------
figure7$effcaprat2 <- figure7$effcaprat * figure7$effcaprat 

func2 <- glm.cluster(onsetdum ~ cincdisa + cincdisb + effcaprat + effcaprat2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, 
                     data=figure7, 
                     family = "binomial", 
                     cluster=figure7$dyad, 
                     weights = figure7$weight_onset)

lwg.range2 <- seq(from = min(figure7$effcaprat), to = max(figure7$effcaprat), by = 0.01)

# Create matrix for predicted values
x.lo2 <- c(1, median(figure7$cincdisa), median(figure7$cincdisb), median(figure7$effcaprat), 
           median(figure7$effcaprat2), median(figure7$distance), median(figure7$majpow1),
           median(figure7$majpow2), median(figure7$contig), median(figure7$year), median(figure7$demauta), median(figure7$demautb), median(figure7$demautinter), median(figure7$ally))

X.lo2 <- matrix(x.lo2, nrow = length(x.lo2), ncol = length(lwg.range2))
X.lo2[4,] <- lwg.range2
X.lo2[5,] <- lwg.range2^2 

B.tilde2 <- mvrnorm(1000, coef(func2), vcov(func2)) 
s.lo2 <- apply(inv.logit(B.tilde2 %*% X.lo2), 2, quantile, c(0.025, 0.5, 0.975))

# Store results in a dataframe
df2 <- data.frame(caprat = lwg.range2, 
                  lower = s.lo2[1,], 
                  median = s.lo2[2,], 
                  upper = s.lo2[3,], 
                  model = "effcaprat")


# ---------------------------
# 📌 Third Model: effcaprat_Log + effcaprat_Log2
# ---------------------------
figure7$effcaprat_Log2 <- figure7$effcaprat_Log * figure7$effcaprat_Log 

func2 <- glm.cluster(onsetdum ~ log_cincdisa + log_cincdisb + effcaprat_Log + effcaprat_Log2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, 
                     data=figure7, 
                     family = "binomial", 
                     cluster=figure7$dyad, 
                     weights = figure7$weight_onset)

lwg.range2 <- seq(from = min(figure7$effcaprat), to = max(figure7$effcaprat), by = 0.01)

# Create matrix for predicted values
x.lo2 <- c(1, median(figure7$log_cincdisa), median(figure7$log_cincdisb), median(figure7$effcaprat_Log), 
           median(figure7$effcaprat_Log2), median(figure7$distance), median(figure7$majpow1),
           median(figure7$majpow2), median(figure7$contig), median(figure7$year), median(figure7$demauta), median(figure7$demautb), median(figure7$demautinter), median(figure7$ally))

X.lo2 <- matrix(x.lo2, nrow = length(x.lo2), ncol = length(lwg.range2))
X.lo2[4,] <- lwg.range2
X.lo2[5,] <- lwg.range2^2 

B.tilde2 <- mvrnorm(1000, coef(func2), vcov(func2)) 
s.lo2 <- apply(inv.logit(B.tilde2 %*% X.lo2), 2, quantile, c(0.025, 0.5, 0.975))

# Store results in a dataframe
df3 <- data.frame(caprat = lwg.range2, 
                  lower = s.lo2[1,], 
                  median = s.lo2[2,], 
                  upper = s.lo2[3,], 
                  model = "effcaprat_log")



# ---------------------------
# 📌 Fourth Model: effcaprat_Log + effcaprat_Log2
# ---------------------------
figure7$effcaprat_BDM2 <- figure7$effcaprat_BDM*figure7$effcaprat_BDM 

func2 <- glm.cluster(onsetdum ~  BDM_cap_1 + BDM_cap_2 + effcaprat_BDM+ effcaprat_BDM2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, 
                     data=figure7, 
                     family = "binomial", 
                     cluster=figure7$dyad, 
                     weights = figure7$weight_onset)

lwg.range2 <- seq(from = min(figure7$effcaprat), to = max(figure7$effcaprat), by = 0.01)

# Create matrix for predicted values
x.lo2 <- c(1, median(figure7$BDM_cap_1), median(figure7$BDM_cap_2), median(figure7$effcaprat_BDM), 
           median(figure7$effcaprat_BDM2), median(figure7$distance), median(figure7$majpow1),
           median(figure7$majpow2), median(figure7$contig), median(figure7$year), median(figure7$demauta), median(figure7$demautb), median(figure7$demautinter), median(figure7$ally))

X.lo2 <- matrix(x.lo2, nrow = length(x.lo2), ncol = length(lwg.range2))
X.lo2[4,] <- lwg.range2
X.lo2[5,] <- lwg.range2^2 

B.tilde2 <- mvrnorm(1000, coef(func2), vcov(func2)) 
s.lo2 <- apply(inv.logit(B.tilde2 %*% X.lo2), 2, quantile, c(0.025, 0.5, 0.975))

# Store results in a dataframe
df4 <- data.frame(caprat = lwg.range2, 
                  lower = s.lo2[1,], 
                  median = s.lo2[2,], 
                  upper = s.lo2[3,], 
                  model = "effcaprat_BDM")


```





```{r}

# ---------------------------
# 📌 Combine Both Data Frames and Plot
# ---------------------------
df_combined <- bind_rows(df1, df2,df3, df4)

ggplot(df_combined, aes(x = caprat, y = median, color = model, fill = model)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.2) +
  labs(title = "Predicted Probability of Use of Force",
       x = "Capabilities Ratio",
       y = "Predicted Probability",
       color = "Model",
       fill = "Model") +
  theme_minimal() +
   ylim(0, NA)


df_nom_vs_eff <- bind_rows(df1, df2)



library(ggplot2)

ggplot(df_nom_vs_eff, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +  # Black lines
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +  # Grey shaded ribbon
  labs(
    title = "Predicted Probability of Use of Force  (Raw Distance)",
    x = "Capabilities Ratio",
    y = "Predicted Probability",
    linetype = "Measurement of Power"  # Updated legend title
  ) +
  scale_linetype_manual(
    values = c("caprat" = "dashed", "effcaprat" = "solid"),  # Custom linetypes
    labels = c("caprat" = "Nominal", "effcaprat" = "Proximate")  # Rename legend labels
  ) +
  theme_minimal(base_family = "Times") +  # Times New Roman font
  theme(
    text = element_text(color = "black"),  # Black font for all text
    legend.position = c(0.85, 0.85),       # Top-right corner of the plot
    legend.title = element_text(face = "bold"),  # Bold legend title
    legend.text = element_text(size = 10),       # Legend text size
    legend.key.width = unit(2, "cm"),            # Increase legend key width for line visibility
    legend.key.height = unit(0.5, "cm"),         # Adjust legend key height
    legend.background = element_rect(color = "black", fill = "white"),  # Box around legend
    panel.grid = element_blank(),                # Remove grid lines
    axis.ticks = element_line(color = "black"),  # Add tick marks
    axis.ticks.length = unit(0.15, "cm"),        # Length of tick marks
    axis.line = element_line(color = "black")    # Add axis lines
  ) +
  ylim(0, 0.000000012)  # Set y-axis limits


df_nom_vs_eff_log <- bind_rows(df1, df3)



library(ggplot2)

ggplot(df_nom_vs_eff_log, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +  # Black lines
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +  # Grey shaded ribbon
  labs(
    title = "Predicted Probability of Use of Force  (Logged Distance)",
    x = "Capabilities Ratio",
    y = "Predicted Probability",
    linetype = "Measurement of Power"  # Updated legend title
  ) +
  scale_linetype_manual(
    values = c("caprat" = "dashed", "effcaprat_log" = "solid"),  # Custom linetypes
    labels = c("caprat" = "Nominal", "effcaprat_log" = "Proximate")  # Rename legend labels
  ) +
  theme_minimal(base_family = "Times") +  # Times New Roman font
  theme(
    text = element_text(color = "black"),  # Black font for all text
    legend.position = c(0.85, 0.85),       # Top-right corner of the plot
    legend.title = element_text(face = "bold"),  # Bold legend title
    legend.text = element_text(size = 10),       # Legend text size
    legend.key.width = unit(2, "cm"),            # Increase legend key width for line visibility
    legend.key.height = unit(0.5, "cm"),         # Adjust legend key height
    legend.background = element_rect(color = "black", fill = "white"),  # Box around legend
    panel.grid = element_blank(),                # Remove grid lines
    axis.ticks = element_line(color = "black"),  # Add tick marks
    axis.ticks.length = unit(0.15, "cm"),        # Length of tick marks
    axis.line = element_line(color = "black")    # Add axis lines
  ) +
  ylim(0, 0.000000012)  # Set y-axis limits





df_nom_vs_eff_bdm <- bind_rows(df1, df4)


library(ggplot2)

ggplot(df_nom_vs_eff_bdm, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +  # Black lines
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +  # Grey shaded ribbon
  labs(
    title = "Predicted Probability of Use of Force (BDM)",
    x = "Capabilities Ratio",
    y = "Predicted Probability",
    linetype = "Measurement of Power"  # Updated legend title
  ) +
  scale_linetype_manual(
    values = c("caprat" = "dashed", "effcaprat_BDM" = "solid"),  # Custom linetypes
    labels = c("caprat" = "Nominal", "effcaprat_BDM" = "Proximate")  # Rename legend labels
  ) +
  theme_minimal(base_family = "Times") +  # Times New Roman font
  theme(
    text = element_text(color = "black"),  # Black font for all text
    legend.position = c(0.85, 0.85),       # Top-right corner of the plot
    legend.title = element_text(face = "bold"),  # Bold legend title
    legend.text = element_text(size = 10),       # Legend text size
    legend.key.width = unit(2, "cm"),            # Increase legend key width for line visibility
    legend.key.height = unit(0.5, "cm"),         # Adjust legend key height
    legend.background = element_rect(color = "black", fill = "white"),  # Box around legend
    panel.grid = element_blank(),                # Remove grid lines
    axis.ticks = element_line(color = "black"),  # Add tick marks
    axis.ticks.length = unit(0.15, "cm"),        # Length of tick marks
    axis.line = element_line(color = "black")    # Add axis lines
  ) +
  ylim(0, 0.000000012)  # Set y-axis limits


```
```{r}
library(ggplot2)
library(patchwork)
library(cowplot)  # Required for get_legend()
library(scales)   # Required for scientific notation

# First plot: CINC/Dist. (No legend)
plot1 <- ggplot(df_nom_vs_eff, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +
  labs(title = "CINC/Dist.", x = "Capabilities Ratio", y = "Predicted Probability") +
  scale_linetype_manual(values = c("caprat" = "dashed", "effcaprat" = "solid"),
                        labels = c("caprat" = "Nominal", "effcaprat" = "Proximate")) +
  scale_y_continuous(labels = scientific, limits = c(0, 0.000000012)) + # Scientific notation
  theme_minimal(base_family = "Times") +
  theme(text = element_text(color = "black"), legend.position = "none",
        panel.grid = element_blank(), axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.15, "cm"), axis.line = element_line(color = "black"))

# Second plot: CINC/log(Dist.) (No legend, no y-axis labels)
plot2 <- ggplot(df_nom_vs_eff_log, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +
  labs(title = "CINC/log(Dist.)", x = "Capabilities Ratio") +
  scale_linetype_manual(values = c("caprat" = "dashed", "effcaprat_log" = "solid"),
                        labels = c("caprat" = "Nominal", "effcaprat_log" = "Proximate")) +
  scale_y_continuous(labels = scientific, limits = c(0, 0.000000012)) + # Scientific notation
  theme_minimal(base_family = "Times") +
  theme(text = element_text(color = "black"), legend.position = "none",
        panel.grid = element_blank(), axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.15, "cm"), axis.line = element_line(color = "black"),
        axis.title.y = element_blank(), axis.text.y = element_blank())

# Third plot: BDM Depreciated (No legend, no y-axis labels)
plot3 <- ggplot(df_nom_vs_eff_bdm, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +
  geom_ribbon(aes(ymin = lower, ymax = upper), fill = "grey70", alpha = 0.2) +
  labs(title = "BDM Depreciated", x = "Capabilities Ratio") +
  scale_linetype_manual(values = c("caprat" = "dashed", "effcaprat_BDM" = "solid"),
                        labels = c("caprat" = "Nominal", "effcaprat_BDM" = "Proximate")) +
  scale_y_continuous(labels = scientific, limits = c(0, 0.000000012)) + # Scientific notation
  theme_minimal(base_family = "Times") +
  theme(text = element_text(color = "black"), legend.position = "none",
        panel.grid = element_blank(), axis.ticks = element_line(color = "black"),
        axis.ticks.length = unit(0.15, "cm"), axis.line = element_line(color = "black"),
        axis.title.y = element_blank(), axis.text.y = element_blank())

# Extract the legend separately using cowplot
legend_plot <- ggplot(df_nom_vs_eff_bdm, aes(x = caprat, y = median, linetype = model)) +
  geom_line(size = 1, color = "black") +
  scale_linetype_manual(values = c("caprat" = "dashed", "effcaprat_BDM" = "solid"),
                        labels = c("caprat" = "Nominal", "effcaprat_BDM" = "Proximate")) +
  labs(linetype = "Measurement of Power") +  # Legend title
  theme_minimal(base_family = "Times") +
  theme(legend.position = "bottom",
        legend.title = element_text(face = "bold"),
        legend.text = element_text(size = 10),
        legend.key.width = unit(3, "cm"),
        legend.key.height = unit(1, "cm"),  # Reduce space around legend keys
        legend.margin = margin(t = -5, b = -5),  # Reduce top and bottom margin
        legend.background = element_rect(color = "black", fill = "white"),
        panel.grid = element_blank(),
        axis.text = element_blank(),
        axis.ticks = element_blank(),
        axis.title = element_blank())

# Extract legend as a separate plot
legend <- wrap_elements(cowplot::get_legend(legend_plot))

# Combine the plots side by side and add the legend below (closer)
combined_plot <- (plot1 + plot2 + plot3) / legend + 
  plot_layout(guides = "collect", heights = c(10, 1))  # Adjust legend height

# Save the combined plot as a PDF with minimal legend spacing
ggsave("Figure_5.pdf", combined_plot, width = 12, height = 4.5, device = "pdf")


```


```{r}
df1_precent_increase <- (df1$median[51] - df1$median[100]) / df1$median[100]
df2_precent_increase <- (df2$median[51] - df2$median[100]) / df2$median[100]
df3_precent_increase <- (df3$median[51] - df3$median[100]) / df3$median[100]
df4_precent_increase <- (df4$median[51] - df4$median[100]) / df4$median[100]





```





```{r include=FALSE}
#Using closet broder distance instead of capital distance

####Table A6{Online Appendix}


library(miceadds)

#CINC
model1001.wg <- glm.cluster(onsetdum ~ caprat + caprat2 + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
dat2$effcaprat_border2 <- dat2$effcaprat_border *dat2$effcaprat_border
model1002.wg <- glm.cluster(onsetdum ~ effcaprat_border + effcaprat_border2 + cincdisa_border + cincdisb_border + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

 dat2$effcaprat_Log_border2 <- dat2$effcaprat_Log_border*dat2$effcaprat_Log_border
#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  effcaprat_Log_border + effcaprat_Log_border2  + log_cincdisa_border + log_cincdisb_border +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC

dat2$effcaprat_BDM_border2 <- dat2$effcaprat_BDM_border  * dat2$effcaprat_BDM_border 
model1004.wg <- glm.cluster(onsetdum ~  effcaprat_BDM_border+ effcaprat_BDM_border2 + BDM_cap_1_border + BDM_cap_2_border +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat2, family = "binomial", cluster=dat2$dyad, weights = dat2$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A6.tex", 
  table = FALSE, 
  include.deviance = FALSE
)

        
```





```{r}
#####Only condsider PR Dyads


dat_pr <- subset(dat, dat$polrev ==1)



#possible negative cases total (years 1816-2010, directed dyads, all PRIO squares (720x360): 

poss <-58206988800

formatC(poss, format="e")
dat_pr_poss <- subset(dat_pr,dat_pr$onsetdum ==1)
per_actual <- length(dat_pr_poss)/poss

#weights for onset
per_sample <- length(subset(dat_pr$onsetdum, dat_pr$onsetdum == 1)) / length(dat_pr$onsetdum)
weight_pos <- per_actual / per_sample	
weight_neg <- (1 - per_actual) / (1 - per_sample)	

dat_pr$weight_onset_pr[dat_pr$onsetdum == 1] <- weight_pos
dat_pr$weight_onset_pr[dat_pr$onsetdum == 0] <- weight_neg

###Table A7{Online Appendix}



library(miceadds)



#CINC
model1001.wg <- glm.cluster(dat_pr$onsetdum ~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally  , data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + year+ demauta + demautb + demautinter + ally , data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)

texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3,  # State power variables
    13, 14, 15, 16, 17,  # CINC-based variables
    18, 19, 20, 21,  # More CINC-based variables
    4, 5, 6, 7, 8, 9, 10, 11, 12  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A7.tex", 
  table = FALSE, 
  include.deviance = FALSE,

  # Use built-in significance markers
   stars = c(0.1, 0.05, 0.01, 0.001) 
)

```




```{r include=FALSE}


####Table A8{Online Appendix}


library(miceadds)

#CINC
model1001.wg <- glm.cluster(onsetdum ~ caprat + caprat2 + cap_1 + cap_2 +  distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


dat_pr$effcaprat2 <- dat_pr$effcaprat*dat_pr$effcaprat
#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~ effcaprat + effcaprat2 + cincdisa + cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

dat_pr$effcaprat_Log2 <- dat_pr$effcaprat_Log*dat_pr$effcaprat_Log
#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

dat_pr$effcaprat_BDM2 <- dat_pr$effcaprat_BDM*dat_pr$effcaprat_BDM
#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  effcaprat_BDM + effcaprat_BDM2 + BDM_cap_1 + BDM_cap_2 +   distance + majpow1 + majpow2  + contig + year + demauta + demautb + demautinter + ally, data=dat_pr, family = "binomial", cluster=dat_pr$dyad, weights = dat_pr$weight_onset_pr)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)






texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  
  # Step 1: Reorder Variables Using Their Numeric Positions
  reorder.coef = c(
    1, 2, 3, 4,   # State power variables
     14, 15, 16, 17, 18,19,  # CINC-based variables
    20, 21, 22, 23, 24,25,  # More CINC-based variables
    5, 6, 7, 8, 9, 10, 11, 12, 13  # Everything else
  ),
  
  # Step 2: Keep `custom.coef.names` in its original order from the model
  custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Year",                     # year
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),  
  
  omit.coef = "(Intercept)",  
  file = "../Paper/Tables/Table_A8.tex", 
  table = FALSE, 
  include.deviance = FALSE
)


        




```













```{r}
####Fixed Effects for Regions and Decades




dat$decade <- factor(floor(dat$year / 10) * 10)


# 1) Create dat$regiona from dat$statea
dat$regiona <- with(dat, ifelse(statea < 100, "North America",
                         ifelse(statea >= 100 & statea <= 199, "South America",
                         ifelse(statea >= 200 & statea <= 399, "Europe",
                         ifelse(statea >= 400 & statea <= 599, "Africa",
                         ifelse(statea >= 600 & statea <= 699, "Middle East",
                         ifelse(statea >= 700 & statea <= 899, "Asia",
                         ifelse(statea >= 900 & statea <= 999, "Oceania", NA))))))))

# 2) Create dat$regionb from dat$stateb using the same criteria
dat$regionb <- with(dat, ifelse(stateb < 100, "North America",
                         ifelse(stateb >= 100 & stateb <= 199, "South America",
                         ifelse(stateb >= 200 & stateb <= 399, "Europe",
                         ifelse(stateb >= 400 & stateb <= 599, "Africa",
                         ifelse(stateb >= 600 & stateb <= 699, "Middle East",
                         ifelse(stateb >= 700 & stateb <= 899, "Asia",
                         ifelse(stateb >= 900 & stateb <= 999, "Oceania", NA))))))))

# 3) Create dat$dyad_region: If regiona equals regionb, use that region; otherwise, "transregional"
dat$dyad_region <- ifelse(dat$regiona == dat$regionb, dat$regiona, "transregional")




# Run the regression with decade fixed effects
m1 <- glm.cluster(onsetdum ~ effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +
            distance + majpow1 + majpow2 + contig + demauta + demautb +
            demautinter + ally +  factor(decade) +
             factor(dyad_region),
            data = dat,
            family = "binomial",
            cluster = dat$dyad,
            weights = dat$weight_onset)

summary(m1)

dat$statea
```



```{r include=FALSE}


####Table A9{Online Appendix}
library(miceadds)



#CINC
model1001.wg <- glm.cluster(onsetdum~ abscapdiff + cap_1 + cap_2 + distance + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally  +  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~  abseffcapdiff +cincdisa + cincdisb +  distance  + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally+  factor(decade) +
             factor(dyad_region) , data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)


#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~ abseffcapdiff_log + log_cincdisa + log_cincdisb +  distance  + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally +  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  abseffcapdiff_BDM + BDM_cap_1 + BDM_cap_2 +  distance  + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally +  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)


texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  reorder.coef = c(
    1:3,   # State power variables
     12:20,  # CINC-based variables
    4:11 # Everything else
  ),
  omit.coef = "(Intercept)|factor\\(decade\\).*|factor\\(dyad_region\\).*",  
  file = "../Paper/Tables/Table_A9.tex", custom.coef.names = c(
        "Nominal Power Difference", # abscapdiff
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Difference (Dist.)",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Difference (Log Dist.)",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
    "Power Difference (BDM LSG)", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),
  table = FALSE, 
  include.deviance = FALSE
)



```


```{r}



####Table A10{Online Appendix}


library(miceadds)

#CINC
model1001.wg <- glm.cluster(onsetdum ~ caprat + caprat2 + cap_1 + cap_2 +  distance + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally+  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1001.wg)
model1001.wg$glm_res$deviance <- adjusted_deviance(model1001.wg)
model1001.wg$glm_res$aic <- adjusted_AIC(model1001.wg)


dat$effcaprat2 <- dat$effcaprat*dat$effcaprat
#CINC/ Raw Dist
model1002.wg <- glm.cluster(onsetdum ~ effcaprat + effcaprat2 + cincdisa + cincdisb +   distance + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally+  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1002.wg)
model1002.wg$glm_res$deviance <- adjusted_deviance(model1002.wg)
model1002.wg$glm_res$aic <- adjusted_AIC(model1002.wg)

dat$effcaprat_Log2 <- dat$effcaprat_Log*dat$effcaprat_Log
#CINC/ Log Dist
model1003.wg <- glm.cluster(onsetdum ~  effcaprat_Log + effcaprat_Log2 + log_cincdisa + log_cincdisb +   distance + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally+  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1003.wg)
model1003.wg$glm_res$deviance <- adjusted_deviance(model1003.wg)
model1003.wg$glm_res$aic <- adjusted_AIC(model1003.wg)

dat$effcaprat_BDM2 <- dat$effcaprat_BDM*dat$effcaprat_BDM
#BDM Depreciated CINC
model1004.wg <- glm.cluster(onsetdum ~  effcaprat_BDM + effcaprat_BDM2 + BDM_cap_1 + BDM_cap_2 +   distance + majpow1 + majpow2  + contig + demauta + demautb + demautinter + ally+  factor(decade) +
             factor(dyad_region), data=dat, family = "binomial", cluster=dat$dyad, weights = dat$weight_onset)
summary(model1004.wg)
model1004.wg$glm_res$deviance <- adjusted_deviance(model1004.wg)
model1004.wg$glm_res$aic <- adjusted_AIC(model1004.wg)




texreg(
  list(model1001.wg, model1002.wg, model1003.wg, model1004.wg),  
  custom.model.names = c("CINC", "CINC/Dist.", "CINC/Log(Dist)", "BDM LSG"), 
  reorder.coef = c(
    1:4,   # State power variables
     13:24,  # CINC-based variables
    5:12 # Everything else
  ),
  omit.coef = "(Intercept)|factor\\(decade\\).*|factor\\(dyad_region\\).*",  
  file = "../Paper/Tables/Table_A10.tex", custom.coef.names = c(
        "Nominal Power Ratio", # caprat
           "Nominal Power Ratio^2", # caprat^2
    "State A power",            # cap_1
    "State B power",            # cap_2
 "Distance between Capitals",                 # distance
    "Major Power A",            # majpow1
    "Major Power B",            # majpow2
    "Contiguous",               # contig
    "Democracy A",              # demauta
    "Democracy B",              # demautb
    "Democracy A * Democracy B",# demautinter
    "Ally",                     # ally
        "Power Ratio (Dist.)",            # abseffcapdiff
  "Power Ratio (Dist.)^2",            # abseffcapdiff
    "CINC A/ Distance",         # cincdisa
    "CINC B/ Distance",         # cincdisb
    "Power Ratio (Log Dist.)",   # abseffcapdiff_log
    "Power Ratio (Log Dist.)^2",   # abseffcapdiff_log
    "CINC A/log(Distance)",     # log_cincdisa
    "CINC B/log(Distance)",     # log_cincdisb
   "Power Ratio (BDM LSG)", # abseffcapdiff_BDM
    "Power Ratio (BDM LSG)^2", # abseffcapdiff_BDM
    "CINC A BDM LSG",           # BDM_cap_1
    "CINC B BDM LSG"           # BDM_cap_2

  ),
  table = FALSE, 
  include.deviance = FALSE
)
        




```

